1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package net.sf.jmimemagic;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27
28 import org.xml.sax.Attributes;
29 import org.xml.sax.ContentHandler;
30 import org.xml.sax.ErrorHandler;
31 import org.xml.sax.SAXException;
32 import org.xml.sax.SAXNotRecognizedException;
33 import org.xml.sax.SAXNotSupportedException;
34 import org.xml.sax.SAXParseException;
35 import org.xml.sax.XMLReader;
36 import org.xml.sax.helpers.DefaultHandler;
37 import org.xml.sax.helpers.XMLReaderFactory;
38
39 import java.io.ByteArrayOutputStream;
40
41 import java.nio.ByteBuffer;
42
43 import java.util.ArrayList;
44 import java.util.Collection;
45 import java.util.HashMap;
46
47
48 /***
49 * DOCUMENT ME!
50 *
51 * @author $Author$
52 * @version $Revision$
53 */
54 public class MagicParser extends DefaultHandler implements ContentHandler, ErrorHandler
55 {
56 private static String magicFile = "/magic.xml";
57 private static Log log = LogFactory.getLog(MagicParser.class);
58
59
60 protected static final String NAMESPACES_FEATURE_ID = "http://xml.org/sax/features/namespaces";
61
62
63 protected static final String VALIDATION_FEATURE_ID = "http://xml.org/sax/features/validation";
64
65
66 protected static final String SCHEMA_VALIDATION_FEATURE_ID = "http://apache.org/xml/features/validation/schema";
67
68
69 protected static final String SCHEMA_FULL_CHECKING_FEATURE_ID = "http://apache.org/xml/features/validation/schema-full-checking";
70
71
72 protected static final String DEFAULT_PARSER_NAME = "org.apache.xerces.parsers.SAXParser";
73
74
75 protected static final boolean DEFAULT_NAMESPACES = true;
76
77
78 protected static final boolean DEFAULT_VALIDATION = false;
79
80
81 protected static final boolean DEFAULT_SCHEMA_VALIDATION = false;
82
83
84 protected static final boolean DEFAULT_SCHEMA_FULL_CHECKING = false;
85 private boolean initialized = false;
86 private XMLReader parser = null;
87 private ArrayList stack = new ArrayList();
88 private Collection matchers = new ArrayList();
89 private MagicMatcher matcher = null;
90 private MagicMatch match = null;
91 private HashMap properties = null;
92 private String finalValue = "";
93 private boolean isMimeType = false;
94 private boolean isExtension = false;
95 private boolean isDescription = false;
96 private boolean isTest = false;
97
98 /***
99 * constructor
100 */
101 public MagicParser()
102 {
103 log.debug("instantiated");
104 }
105
106 /***
107 * parse the xml file and create our MagicMatcher object list
108 *
109 * @throws MagicParseException DOCUMENT ME!
110 */
111 public synchronized void initialize()
112 throws MagicParseException
113 {
114 boolean namespaces = DEFAULT_NAMESPACES;
115 boolean validation = DEFAULT_VALIDATION;
116 boolean schemaValidation = DEFAULT_SCHEMA_VALIDATION;
117 boolean schemaFullChecking = DEFAULT_SCHEMA_FULL_CHECKING;
118
119 if (!initialized) {
120
121 try {
122 parser = XMLReaderFactory.createXMLReader();
123 } catch (Exception e) {
124 try {
125 log.debug("falling back to default parser: " + DEFAULT_PARSER_NAME);
126 parser = XMLReaderFactory.createXMLReader(DEFAULT_PARSER_NAME);
127 } catch (Exception ee) {
128 throw new MagicParseException("unable to instantiate parser");
129 }
130 }
131
132
133 try {
134 parser.setFeature(NAMESPACES_FEATURE_ID, namespaces);
135 } catch (SAXException e) {
136 log.debug("initialize(): warning: Parser does not support feature (" +
137 NAMESPACES_FEATURE_ID + ")");
138 }
139
140 try {
141 parser.setFeature(VALIDATION_FEATURE_ID, validation);
142 } catch (SAXException e) {
143 log.debug("initialize(): warning: Parser does not support feature (" +
144 VALIDATION_FEATURE_ID + ")");
145 }
146
147 try {
148 parser.setFeature(SCHEMA_VALIDATION_FEATURE_ID, schemaValidation);
149 } catch (SAXNotRecognizedException e) {
150
151 } catch (SAXNotSupportedException e) {
152 log.debug("initialize(): warning: Parser does not support feature (" +
153 SCHEMA_VALIDATION_FEATURE_ID + ")");
154 }
155
156 try {
157 parser.setFeature(SCHEMA_FULL_CHECKING_FEATURE_ID, schemaFullChecking);
158 } catch (SAXNotRecognizedException e) {
159
160 } catch (SAXNotSupportedException e) {
161 log.debug("initialize(): warning: Parser does not support feature (" +
162 SCHEMA_FULL_CHECKING_FEATURE_ID + ")");
163 }
164
165
166 parser.setErrorHandler(this);
167 parser.setContentHandler(this);
168
169
170 try {
171
172 String magicURL = MagicParser.class.getResource(magicFile).toString();
173
174 if (magicURL == null) {
175 log.error("initialize(): couldn't load '" + magicURL + "'");
176 throw new MagicParseException("couldn't load '" + magicURL + "'");
177 }
178
179 parser.parse(magicURL);
180 } catch (SAXParseException e) {
181
182 } catch (Exception e) {
183 e.printStackTrace();
184 throw new MagicParseException("parse error occurred - " + e.getMessage());
185 }
186
187 initialized = true;
188 }
189 }
190
191 /***
192 * DOCUMENT ME!
193 *
194 * @return DOCUMENT ME!
195 */
196 public Collection getMatchers()
197 {
198 return matchers;
199 }
200
201 /***
202 * DOCUMENT ME!
203 *
204 * @throws SAXException DOCUMENT ME!
205 */
206 public void startDocument()
207 throws SAXException
208 {
209 log.debug("startDocument()");
210 }
211
212 /***
213 * DOCUMENT ME!
214 *
215 * @throws SAXException DOCUMENT ME!
216 */
217 public void endDocument()
218 throws SAXException
219 {
220 log.debug("endDocument()");
221 }
222
223 /***
224 * DOCUMENT ME!
225 *
226 * @param target DOCUMENT ME!
227 * @param data DOCUMENT ME!
228 *
229 * @throws SAXException DOCUMENT ME!
230 */
231 public void processingInstruction(String target, String data)
232 throws SAXException
233 {
234
235 }
236
237 /***
238 * DOCUMENT ME!
239 *
240 * @param ch DOCUMENT ME!
241 * @param offset DOCUMENT ME!
242 * @param length DOCUMENT ME!
243 *
244 * @throws SAXException DOCUMENT ME!
245 */
246 public void characters(char[] ch, int offset, int length)
247 throws SAXException
248 {
249 String value = new String(ch, offset, length);
250 log.debug("characters(): value is '" + value + "'");
251
252 finalValue += value;
253 }
254
255 /***
256 * DOCUMENT ME!
257 *
258 * @param ch DOCUMENT ME!
259 * @param offset DOCUMENT ME!
260 * @param length DOCUMENT ME!
261 *
262 * @throws SAXException DOCUMENT ME!
263 */
264 public void ignorableWhitespace(char[] ch, int offset, int length)
265 throws SAXException
266 {
267
268 }
269
270 /***
271 * DOCUMENT ME!
272 *
273 * @param uri DOCUMENT ME!
274 * @param localName DOCUMENT ME!
275 * @param qname DOCUMENT ME!
276 * @param attributes DOCUMENT ME!
277 *
278 * @throws SAXException DOCUMENT ME!
279 */
280 public void startElement(String uri, String localName, String qname, Attributes attributes)
281 throws SAXException
282 {
283 log.debug("startElement()");
284 log.debug("startElement(): localName is '" + localName + "'");
285
286
287 if (localName.equals("match")) {
288 log.debug("startElement(): creating new matcher");
289
290 match = new MagicMatch();
291
292 matcher = new MagicMatcher();
293 matcher.setMatch(match);
294 }
295
296
297 if (matcher != null) {
298 if (localName.equals("mimetype")) {
299 isMimeType = true;
300 } else if (localName.equals("extension")) {
301 isExtension = true;
302 } else if (localName.equals("description")) {
303 isDescription = true;
304 } else if (localName.equals("test")) {
305 isTest = true;
306
307 int length = attributes.getLength();
308
309 for (int i = 0; i < length; i++) {
310 String attrLocalName = attributes.getLocalName(i);
311 String attrValue = attributes.getValue(i);
312
313 if (attrLocalName.equals("offset")) {
314 if (!attrValue.equals("")) {
315 match.setOffset(new Integer(attrValue).intValue());
316 log.debug("startElement(): setting offset to '" + attrValue + "'");
317 }
318 } else if (attrLocalName.equals("length")) {
319 if (!attrValue.equals("")) {
320 match.setLength(new Integer(attrValue).intValue());
321 log.debug("startElement(): setting length to '" + attrValue + "'");
322 }
323 } else if (attrLocalName.equals("type")) {
324 match.setType(attrValue);
325 log.debug("startElement(): setting type to '" + attrValue + "'");
326 } else if (attrLocalName.equals("bitmask")) {
327 if (!attrValue.equals("")) {
328 match.setBitmask(attrValue);
329 log.debug("startElement(): setting bitmask to '" + attrValue + "'");
330 }
331 } else if (attrLocalName.equals("comparator")) {
332 match.setComparator(attrValue);
333 log.debug("startElement(): setting comparator to '" + attrValue + "'");
334 }
335 }
336 } else if (localName.equals("property")) {
337 int length = attributes.getLength();
338 String name = null;
339 String value = null;
340
341 for (int i = 0; i < length; i++) {
342 String attrLocalName = attributes.getLocalName(i);
343 String attrValue = attributes.getValue(i);
344
345 if (attrLocalName.equals("name")) {
346 if (!attrValue.equals("")) {
347 name = attrValue;
348 }
349 } else if (attrLocalName.equals("value")) {
350 if (!attrValue.equals("")) {
351 value = attrValue;
352 }
353 }
354 }
355
356
357 if ((name != null) && (value != null)) {
358 if (properties == null) {
359 properties = new HashMap();
360 }
361
362 if (!properties.containsKey(name)) {
363 properties.put(name, value);
364 log.debug("startElement(): setting property '" + name + "'='" + value +
365 "'");
366 } else {
367 log.debug("startElement(): not setting property '" + name +
368 "', duplicate key");
369 }
370 }
371 } else if (localName.equals("match-list")) {
372 log.debug("startElement(): found submatcher list");
373
374
375
376 log.debug("startElement(): pushing current matcher to stack");
377 stack.add(matcher);
378 } else {
379
380 }
381 }
382 }
383
384 /***
385 * DOCUMENT ME!
386 *
387 * @param uri DOCUMENT ME!
388 * @param localName DOCUMENT ME!
389 * @param qname DOCUMENT ME!
390 *
391 * @throws SAXException DOCUMENT ME!
392 */
393 public void endElement(String uri, String localName, String qname)
394 throws SAXException
395 {
396 log.debug("endElement()");
397 log.debug("endElement(): localName is '" + localName + "'");
398
399
400 if (isMimeType) {
401 isMimeType = false;
402 match.setMimeType(finalValue);
403 log.debug("characters(): setting mimetype to '" + finalValue + "'");
404 } else if (isExtension) {
405 isExtension = false;
406 match.setExtension(finalValue);
407 log.debug("characters(): setting extension to '" + finalValue + "'");
408 } else if (isDescription) {
409 isDescription = false;
410 match.setDescription(finalValue);
411 log.debug("characters(): setting description to '" + finalValue + "'");
412 } else if (isTest) {
413 isTest = false;
414 match.setTest(convertOctals(finalValue));
415 log.debug("characters(): setting test to '" + convertOctals(finalValue) + "'");
416 } else {
417
418 }
419
420 finalValue = "";
421
422
423
424 if (localName.equals("match")) {
425
426 if (matcher.isValid()) {
427
428 match.setProperties(properties);
429
430
431 if (stack.size() == 0) {
432 log.debug("endElement(): adding root matcher");
433 matchers.add(matcher);
434 } else {
435
436
437 log.debug("endElement(): adding sub matcher");
438
439 MagicMatcher m = (MagicMatcher) stack.get(stack.size() - 1);
440 m.addSubMatcher(matcher);
441 }
442 } else {
443
444 log.info("endElement(): not adding invalid matcher '" + match.getDescription() +
445 "'");
446 }
447
448 matcher = null;
449 properties = null;
450
451
452 } else if (localName.equals("match-list")) {
453 if (stack.size() > 0) {
454 log.debug("endElement(): popping from the stack");
455 matcher = (MagicMatcher) stack.get(stack.size() - 1);
456
457 stack.remove(matcher);
458 }
459 } else if (localName.equals("mimetype")) {
460 isMimeType = false;
461 } else if (localName.equals("extension")) {
462 isExtension = false;
463 } else if (localName.equals("description")) {
464 isDescription = false;
465 } else if (localName.equals("test")) {
466 isTest = false;
467 }
468 }
469
470 /***
471 * DOCUMENT ME!
472 *
473 * @param ex DOCUMENT ME!
474 *
475 * @throws SAXException DOCUMENT ME!
476 */
477 public void warning(SAXParseException ex)
478 throws SAXException
479 {
480
481 }
482
483 /***
484 * DOCUMENT ME!
485 *
486 * @param ex DOCUMENT ME!
487 *
488 * @throws SAXException DOCUMENT ME!
489 */
490 public void error(SAXParseException ex)
491 throws SAXException
492 {
493
494 throw ex;
495 }
496
497 /***
498 * DOCUMENT ME!
499 *
500 * @param ex DOCUMENT ME!
501 *
502 * @throws SAXException DOCUMENT ME!
503 */
504 public void fatalError(SAXParseException ex)
505 throws SAXException
506 {
507
508 throw ex;
509 }
510
511 /***
512 * replaces octal representations of bytes, written as \ddd to actual byte values.
513 *
514 * @param s a string with encoded octals
515 *
516 * @return string with all octals decoded
517 */
518 private ByteBuffer convertOctals(String s)
519 {
520 int beg = 0;
521 int end = 0;
522 int c1;
523 int c2;
524 int c3;
525 int chr;
526 ByteArrayOutputStream buf = new ByteArrayOutputStream();
527
528 while ((end = s.indexOf('//', beg)) != -1) {
529 if (s.charAt(end + 1) != '//') {
530
531 for (int z = beg; z < end; z++) {
532 buf.write((int) s.charAt(z));
533 }
534
535
536
537 if ((end + 4) <= s.length()) {
538 try {
539 chr = Integer.parseInt(s.substring(end + 1, end + 4), 8);
540
541
542
543
544
545 buf.write(chr);
546 beg = end + 4;
547 end = beg;
548 } catch (NumberFormatException nfe) {
549
550 buf.write((int) '//');
551 beg = end + 1;
552 end = beg;
553 }
554 } else {
555
556 buf.write((int) '//');
557 beg = end + 1;
558 end = beg;
559 }
560 } else {
561
562 buf.write((int) '//');
563 beg = end + 1;
564 end = beg;
565 }
566 }
567
568 if (end < s.length()) {
569 for (int z = beg; z < s.length(); z++) {
570 buf.write((int) s.charAt(z));
571 }
572 }
573
574 try {
575 log.debug("convertOctals(): returning buffer size '" + buf.size() + "'");
576
577 ByteBuffer b = ByteBuffer.allocate(buf.size());
578
579 return b.put(buf.toByteArray());
580 } catch (Exception e) {
581 log.error("convertOctals(): error parsing string: " + e);
582
583 return ByteBuffer.allocate(0);
584 }
585 }
586 }